In [1]:
# Versão da Linguagem Python
from platform import python_version
print('Versão da Linguagem Python Usada Neste Jupyter Notebook:', python_version())
Versão da Linguagem Python Usada Neste Jupyter Notebook: 3.7.6
In [1]:
# Instala o pacote tweepy
!pip install tweepy
Collecting tweepy
Downloading tweepy-3.8.0-py2.py3-none-any.whl (28 kB)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from tweepy) (1.3.0)
Requirement already satisfied: requests>=2.11.1 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from tweepy) (2.22.0)
Requirement already satisfied: six>=1.10.0 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from tweepy) (1.14.0)
Requirement already satisfied: PySocks>=1.5.7 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from tweepy) (1.7.1)
Requirement already satisfied: oauthlib>=3.0.0 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.0)
Requirement already satisfied: certifi>=2017.4.17 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from requests>=2.11.1->tweepy) (2019.11.28)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from requests>=2.11.1->tweepy) (3.0.4)
Requirement already satisfied: idna<2.9,>=2.5 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from requests>=2.11.1->tweepy) (2.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /Users/dmpm/opt/anaconda3/lib/python3.7/site-packages (from requests>=2.11.1->tweepy) (1.25.8)
Installing collected packages: tweepy
Successfully installed tweepy-3.8.0
In [1]:
# Importando os módulos Tweepy, Datetime e Json
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from datetime import datetime
import json
Veja no manual em pdf como criar sua API no Twitter e configure as suas chaves abaixo.
In [2]:
# Adicione aqui sua Consumer Key
consumer_key = "xxxxxxxxx"
In [3]:
# Adicione aqui sua Consumer Secret
consumer_secret = "xxxxxxxxx"
In [4]:
# Adicione aqui seu Access Token
access_token = "xxxxxxxxx"
In [5]:
# Adicione aqui seu Access Token Secret
access_token_secret = "xxxxxxxxx"
In [6]:
# Criando as chaves de autenticação
auth = OAuthHandler(consumer_key, consumer_secret)
In [7]:
auth.set_access_token(access_token, access_token_secret)
In [8]:
# Criando uma classe para capturar os stream de dados do Twitter e
# armazenar no MongoDB
class MyListener(StreamListener):
def on_data(self, dados):
tweet = json.loads(dados)
created_at = tweet["created_at"]
id_str = tweet["id_str"]
text = tweet["text"]
obj = {"created_at":created_at,"id_str":id_str,"text":text,}
tweetind = col.insert_one(obj).inserted_id
print (obj)
return True
In [9]:
# Criando o objeto mylistener
mylistener = MyListener()
In [10]:
# Criando o objeto mystream
mystream = Stream(auth, listener = mylistener)
In [11]:
# Importando do PyMongo o módulo MongoClient
from pymongo import MongoClient
In [12]:
# Criando a conexão ao MongoDB
client = MongoClient('localhost', 27017)
In [13]:
# Criando o banco de dados twitterdb
db = client.twitterdb
In [14]:
# Criando a collection "col"
col = db.tweets
In [15]:
# Criando uma lista de palavras chave para buscar nos Tweets
keywords = ['Big Data', 'Python', 'Data Mining', 'Data Science']
In [16]:
# Iniciando o filtro e gravando os tweets no MongoDB
mystream.filter(track=keywords)
{'created_at': 'Thu Apr 19 23:27:20 +0000 2018', 'id_str': '987110427745173504', 'text': '【必要な経験】\n以下のコンピュータ関連の経験\n・PC操作(Office等の一般的なアプリケーションの利用経験)\n・Unix (Linux, MacOS等)上でのプログラミング経験(シェルスクリプト, C, Pythonなど)\n・画… https://t.co/c3mkI6wAuE', '_id': ObjectId('5ad925d8b093151a487bc040')}
{'created_at': 'Thu Apr 19 23:27:21 +0000 2018', 'id_str': '987110432581308417', 'text': 'RT @treventos: #Python, É hora de Aprender!\n\nhttps://t.co/VTx87nzEzx https://t.co/zIXwI3ch2E', '_id': ObjectId('5ad925d9b093151a487bc041')}
{'created_at': 'Thu Apr 19 23:27:21 +0000 2018', 'id_str': '987110433109635072', 'text': 'RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初めて起動したらあいつ『ようこそ』とか言い出すんだよ。なーにがようこそだよ!人様の家に来てるんだから『お邪魔します』とか『よろしくお願いします』だろうが!」って言い出して爆笑した', '_id': ObjectId('5ad925d9b093151a487bc042')}
{'created_at': 'Thu Apr 19 23:27:22 +0000 2018', 'id_str': '987110438721740800', 'text': 'Why airlines are finally poised to unlock Big Data to enhance the passenger experience https://t.co/4V8DJuPP4S #SPShotels #HotelProfs', '_id': ObjectId('5ad925dbb093151a487bc043')}
{'created_at': 'Thu Apr 19 23:27:33 +0000 2018', 'id_str': '987110482635997185', 'text': "RT @JonTrevithick: The first Python to utter, 'And now for something completely different' was actually Eric Idle, in the 2nd episode. http…", '_id': ObjectId('5ad925e5b093151a487bc044')}
{'created_at': 'Thu Apr 19 23:27:35 +0000 2018', 'id_str': '987110493641830400', 'text': '@LazyBeeScripts I plan to use my profits at serial spamming to help fund a Broadway production of Monty Python and… https://t.co/jaI1G8cdKr', '_id': ObjectId('5ad925e8b093151a487bc045')}
{'created_at': 'Thu Apr 19 23:27:37 +0000 2018', 'id_str': '987110501439094785', 'text': 'RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部のAIセミナー行ってくるやでー」\nぼく「おっいってらっしゃいませ」\n\n~今~\n上司さんメール\n「件名:助けて」\n\nぼく「一体何が」\n\n上司さんメール\n「ずっとpython?とかいうプログラム…', '_id': ObjectId('5ad925eab093151a487bc046')}
{'created_at': 'Thu Apr 19 23:27:38 +0000 2018', 'id_str': '987110504450555904', 'text': 'Importance of “Big Data” สำคัญสำหรับธุรกิจยุค 4.0 อย่างไร - https://t.co/UrnnyqaqkW #BigData #Business', '_id': ObjectId('5ad925eab093151a487bc047')}
{'created_at': 'Thu Apr 19 23:27:38 +0000 2018', 'id_str': '987110504689627136', 'text': 'RT @DozenDolls: @yontengoP @Xatz よくある数式をあえて書かず、\nNNライブラリもあえて使わず、\npythonのnumpy行列計算のコードを積み重ねて\n手書き文字認識が出来るとこまで解説する\n「ゼロからはじめるディープラーニング」\nというオライリー…', '_id': ObjectId('5ad925eab093151a487bc048')}
{'created_at': 'Thu Apr 19 23:27:39 +0000 2018', 'id_str': '987110507545968640', 'text': 'RT @Akira_Kido_N: C# の Linq が python の2倍遅い、は嘘 on @Qiita https://t.co/vYr394r7ca', '_id': ObjectId('5ad925ebb093151a487bc049')}
{'created_at': 'Thu Apr 19 23:27:41 +0000 2018', 'id_str': '987110514982404096', 'text': "@redrock_bball Traded for a four'n'twenty and a killer python.", '_id': ObjectId('5ad925edb093151a487bc04a')}
{'created_at': 'Thu Apr 19 23:27:41 +0000 2018', 'id_str': '987110517318668288', 'text': 'RT @DozenDolls: @yontengoP @Xatz やべ、タイトル間違えてた\n\nhttps://t.co/FFIK8FEAYL\n\nこれね\n原理的なところを独学独習したい人にはおすすめです\n\npython知らない人が読むと\n件の上司の方のように\npythonの説明ば…', '_id': ObjectId('5ad925edb093151a487bc04b')}
{'created_at': 'Thu Apr 19 23:27:42 +0000 2018', 'id_str': '987110519269019649', 'text': 'RT @DozenDolls: @yontengoP @Xatz やべ、タイトル間違えてた\n\nhttps://t.co/FFIK8FEAYL\n\nこれね\n原理的なところを独学独習したい人にはおすすめです\n\npython知らない人が読むと\n件の上司の方のように\npythonの説明ば…', '_id': ObjectId('5ad925eeb093151a487bc04c')}
{'created_at': 'Thu Apr 19 23:27:45 +0000 2018', 'id_str': '987110532229582848', 'text': "RT @carolecadwalla: Wow. This is BIG. @Arron_banks, are you listening to Brittany Kaiser give evidence? She's talking about transferring da…", '_id': ObjectId('5ad925f1b093151a487bc04d')}
{'created_at': 'Thu Apr 19 23:27:45 +0000 2018', 'id_str': '987110532732731392', 'text': 'tinterのラベルフレーム生成をクラスにまとめる時、フレームサイズを指示しないと表示されないのなんでだろ。packのオプションなしにすると表示されないけど、適当にサイズ指定すると表示される。\n#python #tkinter #プログラミング初心者', '_id': ObjectId('5ad925f1b093151a487bc04e')}
{'created_at': 'Thu Apr 19 23:27:45 +0000 2018', 'id_str': '987110534318145536', 'text': 'RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部のAIセミナー行ってくるやでー」\nぼく「おっいってらっしゃいませ」\n\n~今~\n上司さんメール\n「件名:助けて」\n\nぼく「一体何が」\n\n上司さんメール\n「ずっとpython?とかいうプログラム…', '_id': ObjectId('5ad925f2b093151a487bc04f')}
{'created_at': 'Thu Apr 19 23:27:46 +0000 2018', 'id_str': '987110536230948864', 'text': '@leyasupls @HanAlolaSH si tu as un compte netflix, ya les monty python (très cool) et one punch man, pour rigoler un peu', '_id': ObjectId('5ad925f2b093151a487bc050')}
{'created_at': 'Thu Apr 19 23:27:47 +0000 2018', 'id_str': '987110540714631169', 'text': 'have and climb python milkman No to with #diorsauvage', '_id': ObjectId('5ad925f3b093151a487bc051')}
{'created_at': 'Thu Apr 19 23:27:49 +0000 2018', 'id_str': '987110550290087937', 'text': 'Fun consequence of functions as first class values, if you mindlessly call print(print) instead of what you were tr… https://t.co/ye96i1GxFM', '_id': ObjectId('5ad925f5b093151a487bc052')}
{'created_at': 'Thu Apr 19 23:27:52 +0000 2018', 'id_str': '987110564559138816', 'text': 'RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなりアグレッシブで\n「じゃあその場で講師がpythonをライブコーディングして書いて、観客からは適宜、歓声や罵声、指摘や質問が飛ぶ」\nという尖りまくった内容らしい。\n\nなお上司さんはプログラ…', '_id': ObjectId('5ad925f9b093151a487bc053')}
{'created_at': 'Thu Apr 19 23:27:58 +0000 2018', 'id_str': '987110586910556160', 'text': 'RT @Avery1776: @BenKTallmadge @JohnBrennan A FEW MORE MENTIONS\nUnderwear Bomber\nDrone Wars\nSpying on Congress\nHacking the State Dept\n"Moder…', '_id': ObjectId('5ad925feb093151a487bc054')}
{'created_at': 'Thu Apr 19 23:27:59 +0000 2018', 'id_str': '987110593390759936', 'text': 'RT @DozenDolls: @yontengoP @Xatz よくある数式をあえて書かず、\nNNライブラリもあえて使わず、\npythonのnumpy行列計算のコードを積み重ねて\n手書き文字認識が出来るとこまで解説する\n「ゼロからはじめるディープラーニング」\nというオライリー…', '_id': ObjectId('5ad925ffb093151a487bc055')}
{'created_at': 'Thu Apr 19 23:28:00 +0000 2018', 'id_str': '987110598352629762', 'text': '@AlexCatto_ i could go a killer python right about now', '_id': ObjectId('5ad92601b093151a487bc056')}
{'created_at': 'Thu Apr 19 23:28:03 +0000 2018', 'id_str': '987110608855281665', 'text': 'RT @drbiomass: Why are data science leaders running for the exit? https://t.co/RJBTLqg7hA #AI #MachineLearning #BigData #DataScience #FinTe…', '_id': ObjectId('5ad92603b093151a487bc057')}
{'created_at': 'Thu Apr 19 23:28:06 +0000 2018', 'id_str': '987110623367413760', 'text': 'RT @BigDataBlogs: Data Science: Top 100 Influencers, Brands and Publications » https://t.co/bYkM2gyAFf #DataScientist #BigData', '_id': ObjectId('5ad92607b093151a487bc058')}
{'created_at': 'Thu Apr 19 23:28:08 +0000 2018', 'id_str': '987110629071728640', 'text': 'My day-old, new pc’s OS install malfunctioned due to a known bug in the latest update, and needed to be clean insta… https://t.co/wn3FDDjDwd', '_id': ObjectId('5ad92608b093151a487bc059')}
{'created_at': 'Thu Apr 19 23:28:15 +0000 2018', 'id_str': '987110657626587137', 'text': 'RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部のAIセミナー行ってくるやでー」\nぼく「おっいってらっしゃいませ」\n\n~今~\n上司さんメール\n「件名:助けて」\n\nぼく「一体何が」\n\n上司さんメール\n「ずっとpython?とかいうプログラム…', '_id': ObjectId('5ad9260fb093151a487bc05a')}
{'created_at': 'Thu Apr 19 23:28:16 +0000 2018', 'id_str': '987110665251831813', 'text': "RT @kevinarnovitz: Snakes in a backpack. How Robert Covington's python, TJ McConnell's coffee addiction, JJ Redick's fascination with Simul…", '_id': ObjectId('5ad92611b093151a487bc05b')}
{'created_at': 'Thu Apr 19 23:28:17 +0000 2018', 'id_str': '987110668313640960', 'text': 'RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなりアグレッシブで\n「じゃあその場で講師がpythonをライブコーディングして書いて、観客からは適宜、歓声や罵声、指摘や質問が飛ぶ」\nという尖りまくった内容らしい。\n\nなお上司さんはプログラ…', '_id': ObjectId('5ad92611b093151a487bc05c')}
{'created_at': 'Thu Apr 19 23:28:18 +0000 2018', 'id_str': '987110672776552448', 'text': "RT @onyematomorrow4: Retweeted Mary Mkemdi (@NkemdiMary):\n\nI hope the python Governor don't send his dancing pythons to attack the striking…", '_id': ObjectId('5ad92613b093151a487bc05d')}
{'created_at': 'Thu Apr 19 23:28:27 +0000 2018', 'id_str': '987110708167913472', 'text': '@Chet_Cannon @ChiefFabulous https://t.co/MhOIjDuZVl', '_id': ObjectId('5ad9261bb093151a487bc05e')}
{'created_at': 'Thu Apr 19 23:28:28 +0000 2018', 'id_str': '987110714996211712', 'text': 'オラクル、JavaやJavaScript、Ruby、Pythonなど多言語対応を単一ランタイムで実現する「GraalVM」をオープンソースで公開。Twitterが本番環境で採用 - Publickey https://t.co/sAFBRKz2hb', '_id': ObjectId('5ad9261db093151a487bc05f')}
{'created_at': 'Thu Apr 19 23:28:29 +0000 2018', 'id_str': '987110717999407104', 'text': 'RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初めて起動したらあいつ『ようこそ』とか言い出すんだよ。なーにがようこそだよ!人様の家に来てるんだから『お邪魔します』とか『よろしくお願いします』だろうが!」って言い出して爆笑した', '_id': ObjectId('5ad9261db093151a487bc060')}
{'created_at': 'Thu Apr 19 23:28:30 +0000 2018', 'id_str': '987110720683753472', 'text': 'RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初めて起動したらあいつ『ようこそ』とか言い出すんだよ。なーにがようこそだよ!人様の家に来てるんだから『お邪魔します』とか『よろしくお願いします』だろうが!」って言い出して爆笑した', '_id': ObjectId('5ad9261eb093151a487bc061')}
{'created_at': 'Thu Apr 19 23:28:34 +0000 2018', 'id_str': '987110738677284864', 'text': "@toygrind Transformers '86\nHot Fuzz\nMonty Python and the Holy Grail\nThe Big Lebowski", '_id': ObjectId('5ad92622b093151a487bc062')}
{'created_at': 'Thu Apr 19 23:28:38 +0000 2018', 'id_str': '987110756935139328', 'text': 'Really excited for what @wesmckinn is planning https://t.co/O2iuz4fsvY', '_id': ObjectId('5ad92627b093151a487bc063')}
{'created_at': 'Thu Apr 19 23:28:39 +0000 2018', 'id_str': '987110758826827777', 'text': 'Python Developer, #Cambridge, Cambridgeshire, £40,000 - £55,000/annum\xa0 #python https://t.co/ap3pyKS6cx', '_id': ObjectId('5ad92627b093151a487bc064')}
{'created_at': 'Thu Apr 19 23:28:40 +0000 2018', 'id_str': '987110765852110848', 'text': '@MoeAkikaede 错了,给我一个前端我用不来,只能用基于这个前端的wp主题…并且HTML和PHP也才学起走…\nc语言和Python对我来说太遥远🌚', '_id': ObjectId('5ad92629b093151a487bc065')}
{'created_at': 'Thu Apr 19 23:28:43 +0000 2018', 'id_str': '987110776262492160', 'text': 'RT @IainLJBrown: Calgary sports fans are having more fun because of big data\n\nRead more here: https://t.co/gl3ciyEtZY\n\n#BigData #DataScienc…', '_id': ObjectId('5ad9262bb093151a487bc066')}
{'created_at': 'Thu Apr 19 23:28:45 +0000 2018', 'id_str': '987110783657168896', 'text': "RT @DrDenaGrayson: 🔥PALANTIR🔥\n\nIt's far past time for #Palantir founder/#Facebook board member Peter THIEL's turn in the barrel‼️\n\nPalantir…", '_id': ObjectId('5ad9262db093151a487bc067')}
{'created_at': 'Thu Apr 19 23:28:46 +0000 2018', 'id_str': '987110789285797889', 'text': 'RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初めて起動したらあいつ『ようこそ』とか言い出すんだよ。なーにがようこそだよ!人様の家に来てるんだから『お邪魔します』とか『よろしくお願いします』だろうが!」って言い出して爆笑した', '_id': ObjectId('5ad9262eb093151a487bc068')}
{'created_at': 'Thu Apr 19 23:28:47 +0000 2018', 'id_str': '987110793417129984', 'text': 'RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部のAIセミナー行ってくるやでー」\nぼく「おっいってらっしゃいませ」\n\n~今~\n上司さんメール\n「件名:助けて」\n\nぼく「一体何が」\n\n上司さんメール\n「ずっとpython?とかいうプログラム…', '_id': ObjectId('5ad9262fb093151a487bc069')}
{'created_at': 'Thu Apr 19 23:28:50 +0000 2018', 'id_str': '987110807577096192', 'text': 'RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなりアグレッシブで\n「じゃあその場で講師がpythonをライブコーディングして書いて、観客からは適宜、歓声や罵声、指摘や質問が飛ぶ」\nという尖りまくった内容らしい。\n\nなお上司さんはプログラ…', '_id': ObjectId('5ad92633b093151a487bc06a')}
{'created_at': 'Thu Apr 19 23:28:52 +0000 2018', 'id_str': '987110816427139074', 'text': 'RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部のAIセミナー行ってくるやでー」\nぼく「おっいってらっしゃいませ」\n\n~今~\n上司さんメール\n「件名:助けて」\n\nぼく「一体何が」\n\n上司さんメール\n「ずっとpython?とかいうプログラム…', '_id': ObjectId('5ad92635b093151a487bc06b')}
{'created_at': 'Thu Apr 19 23:28:57 +0000 2018', 'id_str': '987110836907855872', 'text': '#OMX #ProjectShivom #Blockchain #healthcare #genomics #cryptocurrency https://t.co/sP0H2w4WYv', '_id': ObjectId('5ad9263ab093151a487bc06c')}
{'created_at': 'Thu Apr 19 23:28:59 +0000 2018', 'id_str': '987110845514575872', 'text': 'RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなりアグレッシブで\n「じゃあその場で講師がpythonをライブコーディングして書いて、観客からは適宜、歓声や罵声、指摘や質問が飛ぶ」\nという尖りまくった内容らしい。\n\nなお上司さんはプログラ…', '_id': ObjectId('5ad9263cb093151a487bc06d')}
{'created_at': 'Thu Apr 19 23:29:01 +0000 2018', 'id_str': '987110852116574208', 'text': 'また発音悩む系\nhttps://t.co/P0acOjBTCQ\nオラクル、JavaやJavaScript、Ruby、Pythonなど多言語対応を単一ランタイムで実現する「GraalVM」をオープンソースで公開。Twitterが本番環境で採用', '_id': ObjectId('5ad9263eb093151a487bc06e')}
{'created_at': 'Thu Apr 19 23:29:03 +0000 2018', 'id_str': '987110859980681216', 'text': 'RT @tictoc: Palantir is using War on Terror tools to track American citizens. The scary thing? The data-mining company is desperate for new…', '_id': ObjectId('5ad9263fb093151a487bc06f')}
---------------------------------------------------------------------------
WantReadError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
279 try:
--> 280 return self.connection.recv_into(*args, **kwargs)
281 except OpenSSL.SSL.SysCallError as e:
~/anaconda3/lib/python3.6/site-packages/OpenSSL/SSL.py in recv_into(self, buffer, nbytes, flags)
1624 result = _lib.SSL_read(self._ssl, buf, nbytes)
-> 1625 self._raise_ssl_error(self._ssl, result)
1626
~/anaconda3/lib/python3.6/site-packages/OpenSSL/SSL.py in _raise_ssl_error(self, ssl, result)
1430 if error == _lib.SSL_ERROR_WANT_READ:
-> 1431 raise WantReadError()
1432 elif error == _lib.SSL_ERROR_WANT_WRITE:
WantReadError:
During handling of the above exception, another exception occurred:
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-16-8f0eed42c826> in <module>()
1 # Iniciando o filtro e gravando os tweets no MongoDB
----> 2 mystream.filter(track=keywords)
~/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py in filter(self, follow, track, async, locations, stall_warnings, languages, encoding, filter_level)
448 self.session.params = {'delimited': 'length'}
449 self.host = 'stream.twitter.com'
--> 450 self._start(async)
451
452 def sitestream(self, follow, stall_warnings=False,
~/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py in _start(self, async)
362 self._thread.start()
363 else:
--> 364 self._run()
365
366 def on_closed(self, resp):
~/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py in _run(self)
264 self.snooze_time = self.snooze_time_step
265 self.listener.on_connect()
--> 266 self._read_loop(resp)
267 except (Timeout, ssl.SSLError) as exc:
268 # This is still necessary, as a SSLError can actually be
~/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py in _read_loop(self, resp)
314 length = 0
315 while not resp.raw.closed:
--> 316 line = buf.read_line().strip()
317 if not line:
318 self.listener.keep_alive() # keep-alive new lines are expected
~/anaconda3/lib/python3.6/site-packages/tweepy/streaming.py in read_line(self, sep)
179 else:
180 start = len(self._buffer)
--> 181 self._buffer += self._stream.read(self._chunk_size)
182 return six.b('')
183
~/anaconda3/lib/python3.6/site-packages/urllib3/response.py in read(self, amt, decode_content, cache_content)
382 else:
383 cache_content = False
--> 384 data = self._fp.read(amt)
385 if amt != 0 and not data: # Platform-specific: Buggy versions of Python.
386 # Close the connection when no data is returned
~/anaconda3/lib/python3.6/http/client.py in read(self, amt)
447 # Amount is given, implement using readinto
448 b = bytearray(amt)
--> 449 n = self.readinto(b)
450 return memoryview(b)[:n].tobytes()
451 else:
~/anaconda3/lib/python3.6/http/client.py in readinto(self, b)
481
482 if self.chunked:
--> 483 return self._readinto_chunked(b)
484
485 if self.length is not None:
~/anaconda3/lib/python3.6/http/client.py in _readinto_chunked(self, b)
576 try:
577 while True:
--> 578 chunk_left = self._get_chunk_left()
579 if chunk_left is None:
580 return total_bytes
~/anaconda3/lib/python3.6/http/client.py in _get_chunk_left(self)
544 self._safe_read(2) # toss the CRLF at the end of the chunk
545 try:
--> 546 chunk_left = self._read_next_chunk_size()
547 except ValueError:
548 raise IncompleteRead(b'')
~/anaconda3/lib/python3.6/http/client.py in _read_next_chunk_size(self)
504 def _read_next_chunk_size(self):
505 # Read the next chunk size from the file
--> 506 line = self.fp.readline(_MAXLINE + 1)
507 if len(line) > _MAXLINE:
508 raise LineTooLong("chunk size")
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
~/anaconda3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
290 raise
291 except OpenSSL.SSL.WantReadError:
--> 292 rd = util.wait_for_read(self.socket, self.socket.gettimeout())
293 if not rd:
294 raise timeout('The read operation timed out')
~/anaconda3/lib/python3.6/site-packages/urllib3/util/wait.py in wait_for_read(socks, timeout)
31 or optionally a single socket if passed in. Returns a list of
32 sockets that can be read from immediately. """
---> 33 return _wait_for_io_events(socks, EVENT_READ, timeout)
34
35
~/anaconda3/lib/python3.6/site-packages/urllib3/util/wait.py in _wait_for_io_events(socks, events, timeout)
24 selector.register(sock, events)
25 return [key[0].fileobj for key in
---> 26 selector.select(timeout) if key[1] & events]
27
28
~/anaconda3/lib/python3.6/site-packages/urllib3/util/selectors.py in select(self, timeout)
511
512 kevent_list = _syscall_wrapper(self._kqueue.control, True,
--> 513 None, max_events, timeout)
514
515 for kevent in kevent_list:
~/anaconda3/lib/python3.6/site-packages/urllib3/util/selectors.py in _syscall_wrapper(func, _, *args, **kwargs)
62 and recalculate their timeouts. """
63 try:
---> 64 return func(*args, **kwargs)
65 except (OSError, IOError, select.error) as e:
66 errcode = None
KeyboardInterrupt:
In [17]:
mystream.disconnect()
In [18]:
# Verificando um documento no collection
col.find_one()
Out[18]:
{'_id': ObjectId('5ad925d8b093151a487bc040'),
'created_at': 'Thu Apr 19 23:27:20 +0000 2018',
'id_str': '987110427745173504',
'text': '【必要な経験】\n以下のコンピュータ関連の経験\n・PC操作(Office等の一般的なアプリケーションの利用経験)\n・Unix (Linux, MacOS等)上でのプログラミング経験(シェルスクリプト, C, Pythonなど)\n・画… https://t.co/c3mkI6wAuE'}
In [19]:
# criando um dataset com dados retornados do MongoDB
dataset = [{"created_at": item["created_at"], "text": item["text"],} for item in col.find()]
In [2]:
# Importando o módulo Pandas para trabalhar com datasets em Python
import pandas as pd
pd.__version__
Out[2]:
'1.0.3'
In [21]:
# Criando um dataframe a partir do dataset
df = pd.DataFrame(dataset)
In [22]:
# Imprimindo o dataframe
df
Out[22]:
created_at
text
0
Thu Apr 19 23:27:20 +0000 2018
【必要な経験】\n以下のコンピュータ関連の経験\n・PC操作(Office等の一般的なアプリ...
1
Thu Apr 19 23:27:21 +0000 2018
RT @treventos: #Python, É hora de Aprender!\n\...
2
Thu Apr 19 23:27:21 +0000 2018
RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初め...
3
Thu Apr 19 23:27:22 +0000 2018
Why airlines are finally poised to unlock Big ...
4
Thu Apr 19 23:27:33 +0000 2018
RT @JonTrevithick: The first Python to utter, ...
5
Thu Apr 19 23:27:35 +0000 2018
@LazyBeeScripts I plan to use my profits at se...
6
Thu Apr 19 23:27:37 +0000 2018
RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部の...
7
Thu Apr 19 23:27:38 +0000 2018
Importance of “Big Data” สำคัญสำหรับธุรกิจยุค ...
8
Thu Apr 19 23:27:38 +0000 2018
RT @DozenDolls: @yontengoP @Xatz よくある数式をあえて書かず...
9
Thu Apr 19 23:27:39 +0000 2018
RT @Akira_Kido_N: C# の Linq が python の2倍遅い、は嘘 ...
10
Thu Apr 19 23:27:41 +0000 2018
@redrock_bball Traded for a four'n'twenty and ...
11
Thu Apr 19 23:27:41 +0000 2018
RT @DozenDolls: @yontengoP @Xatz やべ、タイトル間違えてた\...
12
Thu Apr 19 23:27:42 +0000 2018
RT @DozenDolls: @yontengoP @Xatz やべ、タイトル間違えてた\...
13
Thu Apr 19 23:27:45 +0000 2018
RT @carolecadwalla: Wow. This is BIG. @Arron_b...
14
Thu Apr 19 23:27:45 +0000 2018
tinterのラベルフレーム生成をクラスにまとめる時、フレームサイズを指示しないと表示されな...
15
Thu Apr 19 23:27:45 +0000 2018
RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部の...
16
Thu Apr 19 23:27:46 +0000 2018
@leyasupls @HanAlolaSH si tu as un compte netf...
17
Thu Apr 19 23:27:47 +0000 2018
have and climb python milkman No to with #dior...
18
Thu Apr 19 23:27:49 +0000 2018
Fun consequence of functions as first class va...
19
Thu Apr 19 23:27:52 +0000 2018
RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなり...
20
Thu Apr 19 23:27:58 +0000 2018
RT @Avery1776: @BenKTallmadge @JohnBrennan A F...
21
Thu Apr 19 23:27:59 +0000 2018
RT @DozenDolls: @yontengoP @Xatz よくある数式をあえて書かず...
22
Thu Apr 19 23:28:00 +0000 2018
@AlexCatto_ i could go a killer python right a...
23
Thu Apr 19 23:28:03 +0000 2018
RT @drbiomass: Why are data science leaders ru...
24
Thu Apr 19 23:28:06 +0000 2018
RT @BigDataBlogs: Data Science: Top 100 Influe...
25
Thu Apr 19 23:28:08 +0000 2018
My day-old, new pc’s OS install malfunctioned ...
26
Thu Apr 19 23:28:15 +0000 2018
RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部の...
27
Thu Apr 19 23:28:16 +0000 2018
RT @kevinarnovitz: Snakes in a backpack. How R...
28
Thu Apr 19 23:28:17 +0000 2018
RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなり...
29
Thu Apr 19 23:28:18 +0000 2018
RT @onyematomorrow4: Retweeted Mary Mkemdi (@N...
30
Thu Apr 19 23:28:27 +0000 2018
@Chet_Cannon @ChiefFabulous https://t.co/MhOIj...
31
Thu Apr 19 23:28:28 +0000 2018
オラクル、JavaやJavaScript、Ruby、Pythonなど多言語対応を単一ランタイ...
32
Thu Apr 19 23:28:29 +0000 2018
RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初め...
33
Thu Apr 19 23:28:30 +0000 2018
RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初め...
34
Thu Apr 19 23:28:34 +0000 2018
@toygrind Transformers '86\nHot Fuzz\nMonty Py...
35
Thu Apr 19 23:28:38 +0000 2018
Really excited for what @wesmckinn is planning...
36
Thu Apr 19 23:28:39 +0000 2018
Python Developer, #Cambridge, Cambridgeshire, ...
37
Thu Apr 19 23:28:40 +0000 2018
@MoeAkikaede 错了,给我一个前端我用不来,只能用基于这个前端的wp主题…并且HT...
38
Thu Apr 19 23:28:43 +0000 2018
RT @IainLJBrown: Calgary sports fans are havin...
39
Thu Apr 19 23:28:45 +0000 2018
RT @DrDenaGrayson: 🔥PALANTIR🔥\n\nIt's far past...
40
Thu Apr 19 23:28:46 +0000 2018
RT @_nametaketakewo: Pythonの授業の教員が「Mac買った時さ、初め...
41
Thu Apr 19 23:28:47 +0000 2018
RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部の...
42
Thu Apr 19 23:28:50 +0000 2018
RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなり...
43
Thu Apr 19 23:28:52 +0000 2018
RT @yontengoP: ~午前中~\n上司さん「俺もAIとか勉強するために今日は外部の...
44
Thu Apr 19 23:28:57 +0000 2018
#OMX #ProjectShivom #Blockchain #healthcare #g...
45
Thu Apr 19 23:28:59 +0000 2018
RT @yontengoP: 休憩中に電話で聞いたところでは、IT寄りセミナーの中でもかなり...
46
Thu Apr 19 23:29:01 +0000 2018
また発音悩む系\nhttps://t.co/P0acOjBTCQ\nオラクル、JavaやJa...
47
Thu Apr 19 23:29:03 +0000 2018
RT @tictoc: Palantir is using War on Terror to...
In [23]:
# Importando o módulo Scikit Learn
from sklearn.feature_extraction.text import CountVectorizer
In [3]:
import sklearn
sklearn.__version__
Out[3]:
'0.22.2'
In [24]:
# Usando o método CountVectorizer para criar uma matriz de documentos
cv = CountVectorizer()
count_matrix = cv.fit_transform(df.text)
In [25]:
# Contando o número de ocorrências das principais palavras em nosso dataset
word_count = pd.DataFrame(cv.get_feature_names(), columns=["word"])
word_count["count"] = count_matrix.sum(axis=0).tolist()[0]
word_count = word_count.sort_values("count", ascending=False).reset_index(drop=True)
word_count[:50]
Out[25]:
word
count
0
rt
29
1
co
20
2
https
20
3
python
14
4
the
13
5
yontengop
13
6
to
11
7
ぼく
10
8
上司さんメール
10
9
and
7
10
for
6
11
data
6
12
ずっとpython
5
13
とかいうプログラム
5
14
件名
5
15
一体何が
5
16
上司さん
5
17
big
5
18
おっいってらっしゃいませ
5
19
俺もaiとか勉強するために今日は外部のaiセミナー行ってくるやでー
5
20
of
5
21
助けて
5
22
午前中
5
23
休憩中に電話で聞いたところでは
4
24
in
4
25
ようこそ
4
26
だろうが
4
27
it寄りセミナーの中でもかなりアグレッシブで
4
28
_nametaketakewo
4
29
is
4
30
なお上司さんはプログラ
4
31
なーにがようこそだよ
4
32
よろしくお願いします
4
33
とか言い出すんだよ
4
34
指摘や質問が飛ぶ
4
35
とか
4
36
という尖りまくった内容らしい
4
37
歓声や罵声
4
38
are
4
39
観客からは適宜
4
40
xatz
4
41
じゃあその場で講師がpythonをライブコーディングして書いて
4
42
mac買った時さ
4
43
palantir
4
44
bigdata
4
45
dozendolls
4
46
pythonの授業の教員が
4
47
人様の家に来てるんだから
4
48
お邪魔します
4
49
初めて起動したらあいつ
4
Content source: dsacademybr/PythonFundamentos
Similar notebooks: